{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Logistic Regression on Large Dat"
      ],
      "metadata": {
        "nteract": {
          "transient": {
            "deleting": false
          }
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:29.643Z",
          "iopub.execute_input": "2020-07-26T04:39:29.650Z",
          "iopub.status.idle": "2020-07-26T04:39:30.731Z",
          "shell.execute_reply": "2020-07-26T04:39:30.763Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '1980-01-01'\n",
        "end = '2020-07-24'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# Only keep close columns \n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Adj Close     Close      High       Low  Open  Volume\nDate                                                             \n1980-03-17   3.145833  3.145833  3.302083  3.125000   0.0  219600\n1980-03-18   3.031250  3.031250  3.125000  2.937500   0.0  727200\n1980-03-19   3.041667  3.041667  3.083333  3.020833   0.0  295200\n1980-03-20   3.010417  3.010417  3.062500  3.010417   0.0  159600\n1980-03-21   2.916667  2.916667  3.020833  2.906250   0.0  130800",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1980-03-17</th>\n      <td>3.145833</td>\n      <td>3.145833</td>\n      <td>3.302083</td>\n      <td>3.125000</td>\n      <td>0.0</td>\n      <td>219600</td>\n    </tr>\n    <tr>\n      <th>1980-03-18</th>\n      <td>3.031250</td>\n      <td>3.031250</td>\n      <td>3.125000</td>\n      <td>2.937500</td>\n      <td>0.0</td>\n      <td>727200</td>\n    </tr>\n    <tr>\n      <th>1980-03-19</th>\n      <td>3.041667</td>\n      <td>3.041667</td>\n      <td>3.083333</td>\n      <td>3.020833</td>\n      <td>0.0</td>\n      <td>295200</td>\n    </tr>\n    <tr>\n      <th>1980-03-20</th>\n      <td>3.010417</td>\n      <td>3.010417</td>\n      <td>3.062500</td>\n      <td>3.010417</td>\n      <td>0.0</td>\n      <td>159600</td>\n    </tr>\n    <tr>\n      <th>1980-03-21</th>\n      <td>2.916667</td>\n      <td>2.916667</td>\n      <td>3.020833</td>\n      <td>2.906250</td>\n      <td>0.0</td>\n      <td>130800</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:30.742Z",
          "iopub.execute_input": "2020-07-26T04:39:30.750Z",
          "iopub.status.idle": "2020-07-26T04:39:32.572Z",
          "shell.execute_reply": "2020-07-26T04:39:32.791Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:32.588Z",
          "iopub.execute_input": "2020-07-26T04:39:32.596Z",
          "iopub.status.idle": "2020-07-26T04:39:32.613Z",
          "shell.execute_reply": "2020-07-26T04:39:32.798Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.tail()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/plain": "            Adj Close      Close       High        Low       Open     Volume  \\\nDate                                                                           \n2020-07-17  55.040001  55.040001  55.810001  54.680000  55.310001   34710400   \n2020-07-20  57.459999  57.459999  57.529999  54.830002  55.230000   45034300   \n2020-07-21  57.000000  57.000000  58.500000  56.320000  57.810001   44800700   \n2020-07-22  61.790001  61.790001  62.000000  56.970001  57.070000  135159400   \n2020-07-23  59.570000  59.570000  62.330002  58.630001  61.630001  106829100   \n\n            Increase_Decrease  Buy_Sell_on_Open  Buy_Sell   Returns  \nDate                                                                 \n2020-07-17                  1                 0         1  0.002185  \n2020-07-20                  0                 1         0  0.043968  \n2020-07-21                  1                 0         1 -0.008006  \n2020-07-22                  0                 1         0  0.084035  \n2020-07-23                  0                 0         0 -0.035928  ",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Returns</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2020-07-17</th>\n      <td>55.040001</td>\n      <td>55.040001</td>\n      <td>55.810001</td>\n      <td>54.680000</td>\n      <td>55.310001</td>\n      <td>34710400</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0.002185</td>\n    </tr>\n    <tr>\n      <th>2020-07-20</th>\n      <td>57.459999</td>\n      <td>57.459999</td>\n      <td>57.529999</td>\n      <td>54.830002</td>\n      <td>55.230000</td>\n      <td>45034300</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.043968</td>\n    </tr>\n    <tr>\n      <th>2020-07-21</th>\n      <td>57.000000</td>\n      <td>57.000000</td>\n      <td>58.500000</td>\n      <td>56.320000</td>\n      <td>57.810001</td>\n      <td>44800700</td>\n      <td>1</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.008006</td>\n    </tr>\n    <tr>\n      <th>2020-07-22</th>\n      <td>61.790001</td>\n      <td>61.790001</td>\n      <td>62.000000</td>\n      <td>56.970001</td>\n      <td>57.070000</td>\n      <td>135159400</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.084035</td>\n    </tr>\n    <tr>\n      <th>2020-07-23</th>\n      <td>59.570000</td>\n      <td>59.570000</td>\n      <td>62.330002</td>\n      <td>58.630001</td>\n      <td>61.630001</td>\n      <td>106829100</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>-0.035928</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:32.634Z",
          "iopub.execute_input": "2020-07-26T04:39:32.644Z",
          "iopub.status.idle": "2020-07-26T04:39:32.673Z",
          "shell.execute_reply": "2020-07-26T04:39:33.397Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.shape"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": "(10175, 10)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:32.696Z",
          "iopub.execute_input": "2020-07-26T04:39:32.713Z",
          "iopub.status.idle": "2020-07-26T04:39:32.735Z",
          "shell.execute_reply": "2020-07-26T04:39:33.401Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.linear_model import LogisticRegression\n",
        "from sklearn.preprocessing import StandardScaler"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:32.751Z",
          "iopub.execute_input": "2020-07-26T04:39:32.759Z",
          "iopub.status.idle": "2020-07-26T04:39:33.307Z",
          "shell.execute_reply": "2020-07-26T04:39:33.405Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset.iloc[ : , 0:8].values\n",
        "y = dataset.iloc[ : , 8].values"
      ],
      "outputs": [],
      "execution_count": 7,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:33.319Z",
          "iopub.execute_input": "2020-07-26T04:39:33.329Z",
          "iopub.status.idle": "2020-07-26T04:39:33.341Z",
          "shell.execute_reply": "2020-07-26T04:39:33.409Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Standarize features\n",
        "scaler = StandardScaler()\n",
        "X_std = scaler.fit_transform(X)"
      ],
      "outputs": [],
      "execution_count": 8,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:33.351Z",
          "iopub.execute_input": "2020-07-26T04:39:33.360Z",
          "iopub.status.idle": "2020-07-26T04:39:33.373Z",
          "shell.execute_reply": "2020-07-26T04:39:33.413Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Create logistic regression object using sag solver\n",
        "clf = LogisticRegression(random_state=0, solver='sag')\n",
        "\n",
        "# Train model\n",
        "model = clf.fit(X_std, y)"
      ],
      "outputs": [],
      "execution_count": 9,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:33.382Z",
          "iopub.execute_input": "2020-07-26T04:39:33.387Z",
          "iopub.status.idle": "2020-07-26T04:39:33.678Z",
          "shell.execute_reply": "2020-07-26T04:39:33.854Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.coef_"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "array([[-2.06282781, -2.06282781,  2.94853819,  1.85910412, -0.68592341,\n         0.05855013,  0.15793275,  0.32223378]])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:33.689Z",
          "iopub.execute_input": "2020-07-26T04:39:33.698Z",
          "iopub.status.idle": "2020-07-26T04:39:33.717Z",
          "shell.execute_reply": "2020-07-26T04:39:33.860Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.intercept_"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 11,
          "data": {
            "text/plain": "array([-0.12478437])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 11,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:33.728Z",
          "iopub.execute_input": "2020-07-26T04:39:33.735Z",
          "iopub.status.idle": "2020-07-26T04:39:33.749Z",
          "shell.execute_reply": "2020-07-26T04:39:33.865Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model.score(X, y)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 12,
          "data": {
            "text/plain": "0.46958230958230956"
          },
          "metadata": {}
        }
      ],
      "execution_count": 12,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2020-07-26T04:39:33.757Z",
          "iopub.execute_input": "2020-07-26T04:39:33.764Z",
          "iopub.status.idle": "2020-07-26T04:39:33.780Z",
          "shell.execute_reply": "2020-07-26T04:39:33.870Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "version": "3.5.5",
      "pygments_lexer": "ipython3",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "file_extension": ".py",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      }
    },
    "kernelspec": {
      "argv": [
        "C:\\Users\\Tin Hang\\Anaconda3\\envs\\py35\\python.exe",
        "-m",
        "ipykernel_launcher",
        "-f",
        "{connection_file}"
      ],
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "nteract": {
      "version": "0.24.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}